df_ratings.head()
| name | rating | review | |
|---|---|---|---|
| 0 | Jalsa | 4.0 | A beautiful place to dine inThe interiors take... |
| 1 | Jalsa | 4.0 | I was here for dinner with my family on a week... |
| 2 | Jalsa | 2.0 | Its a restaurant near to Banashankari BDA Me a... |
| 3 | Jalsa | 4.0 | We went here on a weekend and one of us had th... |
| 4 | Jalsa | 5.0 | The best thing about the place is its ambiance... |
df_locations.head()
| Name | latitude | longitude | |
|---|---|---|---|
| 0 | Bangalore Banashankari | 12.915221 | 77.573598 |
| 1 | Bangalore Basavanagudi | 12.941726 | 77.575502 |
| 2 | Bangalore Mysore Road | 12.931409 | 77.506714 |
| 3 | Bangalore Jayanagar | 12.929273 | 77.582423 |
| 4 | Bangalore Kumaraswamy Layout | 12.906817 | 77.563525 |
df_zomato.head()
| name | online_order | book_table | rate | votes | location | rest_type | cuisines | avg_cost_for_2_people | reviews_list | menu_item | listed_type | listed_city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Jalsa | Yes | Yes | 4.1 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800.0 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | Spice Elephant | Yes | No | 4.1 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800.0 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | San Churro Cafe | Yes | No | 3.8 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | Addhuri Udupi Bhojana | No | No | 3.7 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300.0 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | Grand Village | No | No | 3.8 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600.0 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
# Number of restaurants in a particular location
fig = plt.figure(figsize=(20,7))
loc = sns.countplot(x="location", data=df_zomato, palette = "Set1")
loc.set_xticklabels(loc.get_xticklabels(), rotation=90, ha="right")
plt.ylabel("Frequency", size=15)
plt.xlabel("Location", size=18)
plt.title('Number of restaurants in a Location', size = 20, pad=20)
plt.show()
# Most famous restaurant chains in Bengaluru
plt.figure(figsize=(15,7))
chains = df_zomato['name'].value_counts()[:20]
sns.barplot(x=chains, y=chains.index, palette='Set1')
plt.title("Most famous restaurant chains in Bangalore", size=20, pad=20)
plt.xlabel("Number of locals", size=15)
plt.show()
# Number of restaurants taking online order or not
plt.figure(figsize=(15,7))
df_zomato['online_order'].value_counts().plot.bar()
plt.title('Online orders', fontsize = 20)
plt.ylabel('Frequency', size = 15)
plt.show()
# Frequency of restaurants allowing booking table or not
plt.figure(figsize=(15,7))
df_zomato['book_table'].value_counts().plot.bar()
plt.title('Booking Table', fontsize = 20, pad=15)
plt.ylabel('Frequency', fontsize = 15)
plt.show()
# Restaurant rating distribution
plt.figure(figsize=(15,8))
rating = df_zomato['rate']
plt.hist(rating,bins=20, color="red")
plt.title('Restaurant rating distribution', size = 20, pad = 15)
plt.xlabel('Rating', size = 15)
plt.ylabel('No. of restaurants', size = 15)
plt.show()
# Approx cost for 2 people distribution
plt.figure(figsize=(15,8))
sns.violinplot(df_zomato.avg_cost_for_2_people)
plt.title('Approx cost for 2 people distribution', size = 20, pad = 15)
plt.xlabel('Approx cost for 2 people', size = 15)
plt.ylabel('Density', size = 15)
plt.show()
The approx cost for 2 people is around 300-400 INR.
# Most popular cuisines of Bangalore
plt.figure(figsize=(15,8))
cuisines = df_zomato['cuisines'].value_counts()[:15]
sns.barplot(cuisines,cuisines.index)
plt.title('Most popular cuisines of Bangalore', size = 20, pad = 15)
plt.xlabel('No. of restaurants', size = 15)
plt.show()
wordcloud = WordCloud(max_font_size=None, max_words=100, background_color="black",
width=3000, height=2000, collocations=False,
stopwords = stopwords.words('english')).generate(str(df_ratings.review.values))
plot_wordcloud(wordcloud, 'English')
rate_analyzer = SentimentIntensityAnalyzer()
def sentiment_analyzer_scores(sentence):
score = rate_analyzer.polarity_scores(sentence)
return score
ratings.head()
| name | rating | review | neg | neu | pos | compound | |
|---|---|---|---|---|---|---|---|
| 0 | Jalsa | 4.0 | A beautiful place to dine inThe interiors take... | 0.062 | 0.799 | 0.139 | 0.7430 |
| 1 | Jalsa | 4.0 | I was here for dinner with my family on a week... | 0.027 | 0.684 | 0.289 | 0.9623 |
| 2 | Jalsa | 2.0 | Its a restaurant near to Banashankari BDA Me a... | 0.047 | 0.781 | 0.172 | 0.7964 |
| 3 | Jalsa | 4.0 | We went here on a weekend and one of us had th... | 0.000 | 0.725 | 0.275 | 0.9678 |
| 4 | Jalsa | 5.0 | The best thing about the place is its ambiance... | 0.000 | 0.605 | 0.395 | 0.9702 |
# all scores in 4 histograms
fig, axes = plt.subplots(2, 2, figsize=(10,8))
# plot all 4 histograms
ratings.hist('neg', bins=25, ax=axes[0,0], color='lightcoral', alpha=0.6)
axes[0,0].set_title('Negative Sentiment Score')
ratings.hist('neu', bins=25, ax=axes[0,1], color='lightsteelblue', alpha=0.6)
axes[0,1].set_title('Positive Sentiment Score')
ratings.hist('pos', bins=25, ax=axes[1,0], color='chartreuse', alpha=0.6)
axes[1,0].set_title('Neutral Sentiment Score')
ratings.hist('compound', bins=25, ax=axes[1,1], color='navajowhite', alpha=0.6)
axes[1,1].set_title('Compound')
# plot common x- and y-label
fig.text(0.5, 0.04, 'Sentiment Scores', fontweight='bold', ha='center')
fig.text(0.04, 0.5, 'Number of Reviews', fontweight='bold', va='center', rotation='vertical')
# plot title
plt.suptitle('Sentiment Analysis of Zomato reviews\n\n', fontsize=12, fontweight='bold');
# full dataframe with POSITIVE comments
df_pos = ratings.loc[ratings.compound >= 0.95]
# only corpus of POSITIVE comments
pos_comments = df_pos['review'].tolist()
pos_comments[:3]
['I was here for dinner with my family on a weekday The restaurant was completely empty Ambience is good with some good old hindi music Seating arrangement are good too We ordered masala papad panner and baby corn starters lemon and corrionder soup butter roti olive and chilli paratha Food was fresh and good service is good too Good for family hangout\nCheers', 'We went here on a weekend and one of us had the buffet while two of us took Ala Carte Firstly the ambience and service of this place is great The buffet had a lot of items and the good was good We had a Pumpkin Halwa intm the dessert which was amazing Must try The kulchas are great here Cheers', 'The best thing about the place is its ambiance Second best thing was yummy food We try buffet and buffet food was not disappointed us\nTest \nQuality \nService Staff was very professional and friendly\n\nOverall experience was excellent\n\nsubirmajumder85wixsitecom']
# full dataframe with NEGATIVE comments
df_neg = ratings.loc[ratings.compound < 0.0]
# only corpus of NEGATIVE comments
neg_comments = df_neg['review'].tolist()
neg_comments[:3]
['I had a very bad experience here\nI dont know about a la carte but the buffet was the worst They gave us complementary drink and momos before the buffet The momos were really good\nThe number of varieties first of all was very disappointing The service was very slow They refilled the food very slowly The starters were okay The main course also was so so There was two gravies with roti and some rice with raitha They had chats sev puri and pan puri which was average But the desert was disappointing They had gulab Jamun and chocolate cake The jamun was not cooked inside There was a cold blob of raw dough inside The chocolate cake also was really hard and not that good\nOverall the buffet was a bad experience for me', 'Spice elephant soup SPL almost manchow flavour soup Just above medium spicy\n\nLasooni fish tikka was awesome\n\nI dont remember the dessert name but I have attached the photo It had vanilla ice inside wafers Wafer was hell hard egg smell chewy Nightmare dessert \n\nTable leg space was very bad I was so uncomfortable the whole time kept on adjusting my legs\n\nNo parking\n\nFor the taste felt this is too costly', 'Ambience is not that good enough and its not a pocket friendly cafe and the quantity is not that good and desserts are too good enough ']
sns.set_style("whitegrid")
plt.figure(figsize=(8,5))
sns.distplot(df_pos['text_length'], kde=True, bins=50, color='chartreuse')
sns.distplot(df_neg['text_length'], kde=True, bins=50, color='lightcoral')
plt.title('\nDistribution Plot for Length of Comments\n')
plt.legend(['Positive Comments', 'Negative Comments'])
plt.xlabel('\nText Length')
plt.ylabel('Percentage of Comments\n');
It looks like the positive comments have more length than negative ones.
for i in model_loaded.print_topics():
print('Topic number {} \n'.format(i[0]))
for j in i:
print (j)
Topic number 0 0 0.029*"food" + 0.022*"order" + 0.015*"time" + 0.012*"delivery" + 0.011*"bad" + 0.010*"ordered" + 0.010*"dont" + 0.009*"restaurant" + 0.009*"even" + 0.008*"service" Topic number 1 1 0.045*"place" + 0.039*"good" + 0.038*"food" + 0.019*"service" + 0.015*"ambience" + 0.013*"great" + 0.010*"nice" + 0.010*"staff" + 0.009*"visit" + 0.008*"really" Topic number 2 2 0.019*"pizza" + 0.016*"cake" + 0.015*"place" + 0.010*"try" + 0.010*"chocolate" + 0.010*"good" + 0.010*"cheese" + 0.010*"one" + 0.009*"ordered" + 0.009*"cream" Topic number 3 3 0.049*"chicken" + 0.030*"good" + 0.021*"biryani" + 0.020*"ordered" + 0.019*"taste" + 0.019*"food" + 0.013*"rice" + 0.010*"paneer" + 0.010*"veg" + 0.009*"quantity"
wordcloud = WordCloud(background_color="black", width=2500, height=1800,
stopwords = stopwords.words('english')).generate_from_frequencies(first_topic_words)
plot_wordcloud(wordcloud, '\nWord cloud of First Topic')
wordcloud = WordCloud(background_color="black", width=2500, height=1800,
stopwords = stopwords.words('english')).generate_from_frequencies(second_topic_words)
plot_wordcloud(wordcloud, '\nWord cloud of Second Topic')
wordcloud = WordCloud(background_color="black", width=2500, height=1800,
stopwords = stopwords.words('english')).generate_from_frequencies(third_topic_words)
plot_wordcloud(wordcloud, '\nWord cloud of Third Topic')
wordcloud = WordCloud(background_color="black", width=2500, height=1800,
stopwords = stopwords.words('english')).generate_from_frequencies(fourth_topic_words)
plot_wordcloud(wordcloud, '\nWord cloud of Fourth Topic')
# Compute Perplexity
print('\nPerplexity: ', model_loaded.log_perplexity(doc_term_matrix)) # A measure of how good the model is. The lower the better.
# Compute Coherence Score
coherence_model_lda = CoherenceModel(model=model_loaded, texts=ratings_topic_model['text_cleaned'],
dictionary=dictionary, coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)
Perplexity: -7.461145081504907 Coherence Score: 0.5021451296691057
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(model_loaded, doc_term_matrix, dictionary)
vis
def filter_by(df, cuisines, rate):
df_outcome = df
if cuisines:
df_outcome = df_outcome[df_outcome['cuisines'].str.contains(cuisines)]
if rate:
df_outcome = df_outcome[df_outcome['rate'] >= rate]
return df_outcome
"""
Returns a dataframe that recomends restaurants based on name, reviews, cuisine and rate.
Parameters
----------
name : str
Restaurant name that you liked
cuisines : str, optional
Type of cuisine you are looking for
rate: float, optional
Minimum rate you want for the recomended restaurants
"""
def get_recommendations(name, cuisines = None, rate = None):
recommend_restaurant = []
# Find the index of the hotel entered
idx = indices[indices == name].index[0]
# Find the restaurants with a similar cosine-sim value and order them from bigges number
score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
# Extract top 30 restaurant indexes with a similar cosine-sim value
top30_indexes = list(score_series.iloc[0:31].index)
# Names of the top 30 restaurants
for each in top30_indexes:
recommend_restaurant.append(list(df_percent.index)[each])
# Creating the new data set to show similar restaurants
df_new = pd.DataFrame(columns=['cuisines', 'rate', 'avg_cost_for_2_people'])
# Create the top 30 similar restaurants with some of their columns
for each in recommend_restaurant:
df_new = df_new.append(pd.DataFrame(df_percent[['cuisines','rate', 'avg_cost_for_2_people']][df_percent.index == each].sample()))
# Drop the same named restaurants and sort only the top 10 by the highest rating
df_new = df_new.drop_duplicates(subset=['cuisines','rate', 'avg_cost_for_2_people'], keep=False)
df_new = df_new.sort_values(by='rate', ascending=False).head(10)
print('TOP %s RESTAURANTS LIKE %s : ' % (str(len(df_new)), name))
return filter_by(df_new, cuisines, rate)
get_recommendations('Jalsa', 'North Indian', 4.5)
TOP 2 RESTAURANTS LIKE Jalsa :
| cuisines | rate | avg_cost_for_2_people |
|---|
get_recommendations('China Bowl', 'Chinese', 3)
TOP 4 RESTAURANTS LIKE China Bowl :
| cuisines | rate | avg_cost_for_2_people | |
|---|---|---|---|
| Yo! Chow | Chinese, Momos | 4.4 | 800.0 |
| Green Onion | Chinese | 4.3 | 550.0 |
| Green Onion | Chinese | 3.3 | 400.0 |
| Chinese Street | Chinese | 3.1 | 650.0 |
get_recommendations('Dabba Karkhana')
TOP 8 RESTAURANTS LIKE Dabba Karkhana :
| cuisines | rate | avg_cost_for_2_people | |
|---|---|---|---|
| Hunger Meals | South Indian, North Indian, Biryani | 3.8 | 400.0 |
| Agarwal Food Service | North Indian, Chinese, Biryani | 3.8 | 400.0 |
| Cinnamon | North Indian, Asian, Continental | 3.7 | 1000.0 |
| Swad Punjab Da | North Indian | 3.7 | 150.0 |
| Melange - Hotel Ekaa | North Indian, Chinese, Continental, Mangalorean | 3.2 | 900.0 |
| Mast Kalandar | North Indian | 2.5 | 450.0 |
| Mast Kalandar | North Indian | 2.4 | 450.0 |
| Mast Kalandar | North Indian | 2.3 | 450.0 |
import plotly.express as px
px.set_mapbox_access_token('pk.eyJ1IjoibnRzaGNob25nIiwiYSI6ImNrbWRxaHI2eDJvNTYydW53MmNjejg5emcifQ.TPLX9hTzCJUr41zLPxxxfQ')
fig = px.scatter_mapbox(location_coords, lat="latitude", lon="longitude", color="rest_count", size="rest_count",
hover_name='short_name', color_continuous_scale=px.colors.cyclical.IceFire, size_max=60, zoom=12)
fig.show();